Feature Extraction


In [2]:
import pandas as pd

###
### Load Data Set
###
df=pd.read_csv(
    'cs-training.csv', 
    sep=',',
    header=0)
data = df.drop(
    df.columns[0], 
    axis=1)

# Drop rows with missing column data
data = data.dropna()

###
### Convert Data Into List Of Dict Records
###
data = data.to_dict(orient='records')

###
### Seperate Target and Outcome Features
###
from sklearn.feature_extraction import DictVectorizer
from pandas import DataFrame
vec = DictVectorizer()

df_data = vec.fit_transform(data).toarray()
feature_names = vec.get_feature_names()
df_data = DataFrame(
    df_data,
    columns=feature_names)
    
outcome_feature = df_data['SeriousDlqin2yrs']
target_features = df_data.drop('SeriousDlqin2yrs', axis=1)

Generate training and test set


In [3]:
###
### Generate Training and Testing Set 
###
from sklearn import cross_validation

"""
    X_1: independent (target) variables for first data set
    Y_1: dependent (outcome) variable for first data set
    X_2: independent (target) variables for the second data set
    Y_2: dependent (outcome) variable for the second data set
"""
X_1, X_2, Y_1, Y_2 = cross_validation.train_test_split(
    target_features, outcome_feature, test_size=0.5, random_state=0)

Define Classifier


In [5]:
###
### Define Classifier
###                             
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()

Train Classifier


In [6]:
###
### Train Classifier 
###                              
clf.fit(X_1,Y_1)


Out[6]:
GaussianNB()

Print Accuracy


In [11]:
###
### Print Accuracy and Confusion Matrix
###

output = clf.predict(X_2)

from sklearn.metrics import confusion_matrix
matrix = confusion_matrix(output, Y_2)
score = clf.score(X_2, Y_2)

In [15]:
print ("accuracy: {0}".format(score.mean()))


accuracy: 0.9294088301322025

In [16]:
print (matrix)


[[55798  4094]
 [  151    92]]

Save Classifier (A folder called model must be created first)


In [18]:
###
### Save Classifier
###
from sklearn.externals import joblib
joblib.dump(clf, 'model/nb.pkl')


Out[18]:
['model/nb.pkl',
 'model/nb.pkl_01.npy',
 'model/nb.pkl_02.npy',
 'model/nb.pkl_03.npy',
 'model/nb.pkl_04.npy',
 'model/nb.pkl_05.npy']

Ignore Warnings


In [20]:
import warnings
from flask.exthook import ExtDeprecationWarning

warnings.simplefilter('ignore', ExtDeprecationWarning)

Setup Flask


In [21]:
from flask import Flask
from flask.ext.restplus import Api
from flask.ext.restplus import fields
from sklearn.externals import joblib

app = Flask(__name__)

api = Api(
   app, 
   version='1.0', 
   title='Credit API',
   description='A simple Prediction API')

ns = api.namespace('approve_credit', 
   description='Approve Credit Operations')

Setup parser


In [22]:
parser = api.parser()
parser.add_argument(
   'RevolvingUtilizationOfUnsecuredLines', 
   type=float, 
   required=True, 
   help='Total balance on credit cards and personal lines of credit except real estate and no installment debt like car loans divided by the sum of credit limits', 
   location='form')
parser.add_argument(
   'age', 
   type=float, 
   required=True, 
   help='Age of borrower in years',
   location='form')
parser.add_argument(
   'NumberOfTime30-59DaysPastDueNotWorse', 
   type=float, 
   required=True, 
   help='Number of times borrower has been 30-59 days past due but no worse in the last 2 years.',
   location='form')
parser.add_argument(
   'DebtRatio', 
   type=float, 
   required=True, 
   help='Monthly debt payments, alimony,living costs divided by monthy gross income',
   location='form')
parser.add_argument(
   'MonthlyIncome', 
   type=float, 
   required=True, 
   help='Monthly income',
   location='form')
parser.add_argument(
   'NumberOfOpenCreditLinesAndLoans', 
   type=float, 
   required=True, 
   help='Number of Open loans (installment like car loan or mortgage) and Lines of credit (e.g. credit cards)',
   location='form')
parser.add_argument(
   'NumberOfTimes90DaysLate', 
   type=float, 
   required=True, 
   help='Number of times borrower has been 90 days or more past due.',
   location='form')
parser.add_argument(
   'NumberRealEstateLoansOrLines', 
   type=float, 
   required=True, 
   help='Number of mortgage and real estate loans including home equity lines of credit',
   location='form')
parser.add_argument(
   'NumberOfTime60-89DaysPastDueNotWorse', 
   type=float, 
   required=True, 
   help='Number of mortgage and real estate loans including home equity lines of credit',
   location='form')
parser.add_argument(
   'NumberOfDependents', 
   type=float, 
   required=True, 
   help='Number of mortgage and real estate loans including home equity lines of credit',
   location='form')


Out[22]:
<flask_restplus.reqparse.RequestParser at 0x1112afbe0>

Setup Web API


In [23]:
resource_fields = api.model('Resource', {
    'result': fields.String,
})

from flask.ext.restplus import Resource
@ns.route('/')
class CreditApi(Resource):

   @api.doc(parser=parser)
   @api.marshal_with(resource_fields)
   def post(self):
     args = parser.parse_args()
     result = self.get_result(args)

     return result, 201

   def get_result(self, args):
      debtRatio = args["DebtRatio"]
      monthlyIncome = args["MonthlyIncome"]
      dependents = args["NumberOfDependents"]
      openCreditLinesAndLoans = args["NumberOfOpenCreditLinesAndLoans"]
      pastDue30Days = args["NumberOfTime30-59DaysPastDueNotWorse"]
      pastDue60Days = args["NumberOfTime60-89DaysPastDueNotWorse"]
      pastDue90Days = args["NumberOfTimes90DaysLate"]
      realEstateLoansOrLines = args["NumberRealEstateLoansOrLines"]
      unsecuredLines = args["RevolvingUtilizationOfUnsecuredLines"]
      age = args["age"] 

      from pandas import DataFrame
      df = DataFrame([[
         debtRatio,
         monthlyIncome,
         dependents,
         openCreditLinesAndLoans,
         pastDue30Days,
         pastDue60Days,
         pastDue90Days,
         realEstateLoansOrLines,
         unsecuredLines,
         age
      ]])

      clf = joblib.load('model/nb.pkl');

      result = clf.predict(df)
      if(result[0] == 1.0): 
         result = "deny" 
      else: 
         result = "approve"

      return {
         "result": result
      }

if __name__ == '__main__':
    app.run(debug=True)


An exception has occurred, use %tb to see the full traceback.

SystemExit: 1
To exit: use 'exit', 'quit', or Ctrl-D.

In [ ]: